library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("/Users/lundai/Desktop/RNASeqExample/")
samples <- read.csv('sample_info.csv',header = TRUE, sep = ",", quote = "\"", dec = ".", fill = TRUE, row.names = 1)
genes <- read.csv('expression_results.csv',header = TRUE, sep = ",", quote = "\"", dec = ".", fill = TRUE, row.names = 1)
d <- density(genes$KITA_02) # returns the density data
plot(density(log2(genes$KITA_02[(genes$KITA_02>0)]))) # plots the results
## 2. Density distribution of PF_BASES
d <- density(samples$PF_BASES) # returns the density data
plot(d) # plots the results
## 3. Scatterplot of two columns against one another, KITA_01 vs. KITA_03 on a log2 scale
plot(log2(genes$KITA_01[(genes$KITA_01>10 |genes$KITA_03>10 )]),log2(genes$KITA_03[(genes$KITA_01>10 |genes$KITA_03>10 )]))
## 4. Heatmap showing how well all the samples correlate
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
library(ggplot2)
samples$uid=rownames(samples)
genes_summary<-data.frame(
UID=rownames(samples),
min=minBySample <- sapply(genes, function(x) min(x[x > 0])),
max=maxBySample <- sapply(genes, function(x) max(x))
)
corr<-cor(genes)
melted_corr <- melt(corr)
head(melted_corr)
## X1 X2 value
## 1 KITA_01 KITA_01 1.0000000
## 2 KITA_02 KITA_01 0.9172325
## 3 KITA_03 KITA_01 0.7059474
## 4 KITA_04 KITA_01 0.7890462
## 5 KITA_05 KITA_01 0.8727615
## 6 KITA_06 KITA_01 0.7956106
ggplot(melted_corr , aes(x = X1, y = X2)) +
geom_raster(aes(fill = value)) +
scale_fill_gradient2(low="green", mid="white", high="red", midpoint=0.5) + theme_classic()
## 5. Hierarchal clustering dendrogram
genes_transsample <- t(genes[c(rep(FALSE,19),TRUE), ])
clusters <- hclust(dist(genes_transsample))
library('dendextend')
##
## ---------------------
## Welcome to dendextend version 1.8.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
dend <- as.dendrogram(clusters)
dend <- rotate(dend, 1:93)
dend <- color_branches(dend, k=4)
par(cex=0.5) # reduces font
plot(dend)
## 6. PCA
library('plotly')
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
setwd("/Users/lundai/Desktop/RNASeqExample/")
samples <- read.csv('sample_info.csv',header = TRUE, sep = ",", quote = "\"", dec = ".", fill = TRUE, row.names = 1)
genes <- read.csv('expression_results.csv',header = TRUE, sep = ",", quote = "\"", dec = ".", fill = TRUE, row.names = 1)
min(genes[genes>0])
## [1] 8.05e-12
# [1] 8.05e-12
genes.log <-log2(genes+8.05e-12)
genes.log.small <- genes.log[seq(1, nrow(genes.log), 20), ]
pca <- prcomp(genes.log.small,center = TRUE,scale. = TRUE)
std_dev <- pca$sdev
pr_var <- std_dev^2
pr_var[1:10]
## [1] 60.8780119 4.5662705 1.1785538 0.8067263 0.6764104 0.5586862
## [7] 0.5417689 0.5248776 0.5044897 0.5011967
prop_varex <- pr_var/sum(pr_var)
pcadf<-data.frame(pca$rotation)
samples$uid<-rownames(samples)
pcadf$uid<-rownames(pcadf)
samples<-inner_join(samples,pcadf,by="uid")
plot_ly(samples, x = ~PC2, y = ~PC3, z = ~PC4, size= ~reads,marker = list(symbol = 'circle', sizemode = 'diameter'), sizes = c(5, 25), color = ~Kit, colors = c('#F25385', '#0099E2')) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = 'PC2'),
yaxis = list(title = 'PC3'),
zaxis = list(title = 'PC4')))
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
```